package cn.showcon.firstapp.service;

import dev.langchain4j.data.document.Document;
import dev.langchain4j.data.document.DocumentParser;
import dev.langchain4j.data.document.loader.FileSystemDocumentLoader;
import dev.langchain4j.data.document.parser.apache.tika.ApacheTikaDocumentParser;
import org.apache.tika.parser.AutoDetectParser;

import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Path;
import java.nio.file.Paths;

/**
 * @Author Xue Lanbin
 */
public class DocumentParserService {

    static class Tika1 {

        public static void main1(String[] args) throws IOException {


//            Path path = Paths.get("C:\\Users\\ASUS\\Desktop\\ai\\测试合同\\pdf\\1.产品买卖合同 - 副本.pdf");
//            InputStream inputStream = new FileInputStream(path.toFile());

            String fileName = "test.text";
            InputStream inputStream = DocumentParserService.class.getClassLoader().getResourceAsStream(fileName);

            DocumentParser parser = new ApacheTikaDocumentParser(AutoDetectParser::new, null, null, null);
            Document document = parser.parse(inputStream);
            System.out.println(document.text());
        }

        public static void main(String[] args) throws IOException {

            String fileName = "D:\\xuelb\\workspace\\ai-demo\\langchain1\\firstapp\\src\\main\\resources\\范文.docx";

            DocumentParser parser = new ApacheTikaDocumentParser(AutoDetectParser::new, null, null, null);
            Document document = FileSystemDocumentLoader.loadDocument(fileName, parser);

            System.out.println(document.text());
        }
    }
}
